"""
Author： Mr.Sun
Datetime： 2023/5/10 10:11 
FileName: key_word_match.py
Desc:  关键词匹配
"""
import cpca
import dataset

from .data_operate import base_dir
from .decorators import except_decorator
from .logger import logger


@except_decorator
class KeywordMatch(object):
    def __init__(self):
        self.db = dataset.connect('sqlite:///{}'.format(base_dir),
                                  engine_kwargs={'connect_args': {'check_same_thread': True}},
                                  sqlite_wal_mode=False)
        self.query = "select key_word from key_words where status=1"
        self.table = self.db['key_words']

    def key_word_match(self, title=None) -> str:
        """
        关键词匹配
        :param title:
        :return:
        """
        in_words = ["学院", "大学", "校园"]
        not_in_words = ["应届", "选调生", "优秀高校毕业生", "2024届", "2025届", "2026届", '临时', '拟录用', '博士',
                        '实习']
        if any(word in title for word in in_words) and not any(word in title for word in not_in_words):
            return "大学招聘"
        elif any(word in title for word in not_in_words):
            return "None"
        else:
            title = [title]
            df = cpca.transform(title)
            logger.info('cpca 解析结果' + str(df))
            province = df.at[0, '省']
            city = df.at[0, '市']
            # 这个每次都会查询，效率低，可优化
            key_words = [x['key_word'] for x in self.db.query(self.query)]
            if city in key_words:
                logger.info(title[0] + " 匹配到的关键词是: " + city)
                return city
            elif province in key_words:
                logger.info(title[0] + " 匹配到的关键词是: " + province)
                return province
            else:
                logger.info(title[0] + " 未匹配到关键词.")
                return "None"

#
# if __name__ == '__main__':
#     print(KeywordMatch().key_word_match("2023年浙江湖州安吉县卫生健康系统下属事业单位招聘31人公告"))
